#----------------------------------------------
# Measuring Misperceptions: Limits of Party-Specific Stereotype Reports
# Orr and Huber, 2021, POQ
# Inputs: OrrHuber_POQ_StudySM_raw.csv and publicly available national survey files (ANES)
# Outputs: Replication of additional study referenced in the SM
# Figure A9
#----------------------------------------------


#----------------------------------------------
# Load Packages
#----------------------------------------------

library(readstata13)
library(dplyr)
library(ggplot2)
library(estimatr)


#----------------------------------------------
# Load data
#----------------------------------------------

dat <- read.csv("OrrHuber_POQ_StudySM_raw.csv", as.is = TRUE)
anes <- read.dta13("anes_timeseries_2016_Stata13.dta", nonint.factors = TRUE)


#----------------------------------------------
# Clean survey data
#----------------------------------------------

# If initial inferences did not sum to 100, respondents were given a second
# opportunity to answer the question. The following code identifies which
# respondents used this second opportunity to provide inferences which sum to 100.
# These responses will be uses in the analysis ("fixed") only if they sum to 100.

racefix <- which(c(dat$race_guess2_1 + dat$race_guess2_2 + dat$race_guess2_3 +
                     dat$race_guess2_4 + dat$race_guess2_5) == 100)
relfix <- which(c(dat$rel_guess2_1 + dat$rel_guess2_2 + dat$rel_guess2_3 +
                     dat$rel_guess2_4 + dat$rel_guess2_5) == 100)

# Any responses which do not sum to 100 will be dropped. The following code
# identifies responses to exclude from analysis.

racedrop <- which(c(dat$race_guess2_1 + dat$race_guess2_2 + dat$race_guess2_3 +
                     dat$race_guess2_4 + dat$race_guess2_5) != 100)
reldrop <- which(c(dat$rel_guess2_1 + dat$rel_guess2_2 + dat$rel_guess2_3 +
                     dat$rel_guess2_4 + dat$rel_guess2_5) != 100)


# Rename variables
dat <- rename(dat, 
               race_g_white = race_guess_1, race_g_black = race_guess_2, race_g_hispanic = race_guess_3,
               race_g_asian = race_guess_4, race_g_other = race_guess_5,
               rel_g_pro = rel_guess_1, rel_g_cath = rel_guess_2, rel_g_jewish = rel_guess_3,
               rel_g_ath = rel_guess_4, rel_g_other = rel_guess_5)

# Merge revised responses if revised responses summed to 100.
dat$race_g_white[racefix]  <- dat$race_guess2_1[racefix]
dat$race_g_black[racefix] <- dat$race_guess2_2[racefix]
dat$race_g_hispanic[racefix]<- dat$race_guess2_3[racefix]
dat$race_g_asian[racefix]<- dat$race_guess2_4[racefix]
dat$race_g_other[racefix]<- dat$race_guess2_5[racefix]
dat$rel_g_pro[relfix] <- dat$rel_guess2_1[relfix]
dat$rel_g_cath[relfix] <- dat$rel_guess2_2[relfix]
dat$rel_g_jewish[relfix] <- dat$rel_guess2_3[relfix]
dat$rel_g_ath[relfix] <- dat$rel_guess2_4[relfix]
dat$rel_g_other[relfix] <- dat$rel_guess2_5[relfix]


# Drop response if inferences did not sum to 100 on either attempt.
dat$race_g_white[racedrop]  <- NA
dat$race_g_black[racedrop] <- NA
dat$race_g_hispanic[racedrop]<- NA
dat$race_g_asian[racedrop]<- NA
dat$race_g_other[racedrop]<- NA
dat$rel_g_pro[reldrop] <- NA
dat$rel_g_cath[reldrop] <- NA
dat$rel_g_jewish[reldrop] <- NA
dat$rel_g_ath[reldrop] <- NA
dat$rel_g_other[reldrop] <- NA


#----------------------------------------------
# Estimate paty traits from ANES data
#----------------------------------------------

anes$pid2 <- recode(anes$V161158x, .default = NA_character_,
                  "1. Strong Democrat" = "D", "2. Not very strong Democract" = "D", "3. Independent-Democrat" = "D",
                  "7. Strong Republican" = "R", "6. Not very strong Republican" = "R", "5. Independent-Republican" = "R")

anes$race <- recode(anes$V161310x, .default = NA_character_,
                    "1. White, non-Hispanic" = "White, non-Hispanic",
                    "2. Black, non-Hispanic" = "Black, non-Hispanic", 
                    "3. Asian, native Hawaiian or other Pacif Islr,non-Hispanic" = "Asian",
                    "4. Native American or Alaska Native, non-Hispanic" = "Other race",
                    "5. Hispanic" = "Hispanic",
                    "6. Other non-Hispanic incl multiple races [WEB: blank 'Other' counted as a race]" = "Other race")

# Merge religious affiliation questions asked to respondents who do or do not attend services
anes$rel_merged <- as.character(anes$V161247a)
anes$rel_merged[anes$rel_merged == "-1. INAP, 2,-8,-9 in V161244/1 in V161244 and 5 in V161245 "] <- 
  as.character(anes$V161247b[anes$rel_merged == "-1. INAP, 2,-8,-9 in V161244/1 in V161244 and 5 in V161245 "])

anes$relig <- recode(anes$rel_merged, .default = NA_character_,
                         "1. Protestant" = "Protestant", "2. Catholic" = "Catholic", "3. Jewish" = "Jewish", 
                         "4. Other" = "Other", 
                         "-1. INAP, 2,-8,-9 in V161244 and 2,-8,-9 in V161246/1 in V161244 and 1-4,-8,-9 in V161245 " = "Not religious")


anes_pid <- group_by(anes[!is.na(anes$pid2),], pid2) %>%
  summarize(white = round(weighted.mean(race == "White, non-Hispanic", na.rm = TRUE, w = V160101), 3),
            black = round(weighted.mean(race == "Black, non-Hispanic", na.rm = TRUE, w = V160101), 3),
            hispanic = round(weighted.mean(race == "Hispanic", na.rm = TRUE, w = V160101), 3),
            asian = round(weighted.mean(race == "Asian", na.rm = TRUE, w = V160101), 3),
            race_other = round(weighted.mean(race == "Other race", na.rm = TRUE, w = V160101), 3),
            protestant = round(weighted.mean(relig == "Protestant", na.rm = TRUE, w = V160101), 3),
            catholic = round(weighted.mean(relig == "Catholic", na.rm = TRUE, w = V160101), 3),
            jewish = round(weighted.mean(relig == "Jewish", na.rm = TRUE, w = V160101), 3),
            atheist = round(weighted.mean(relig == "Not religious", na.rm = TRUE, w = V160101), 3),
            rel_other = round(weighted.mean(relig == "Other", na.rm = TRUE, w = V160101), 3)) %>% 
  t() %>% 
  as.data.frame()
colnames(anes_pid) <- c("Dem", "Rep")
anes_pid <- anes_pid[-1,]
anes_pid[] <- apply(anes_pid, 2, function(x) as.numeric(as.character(x))*100)


#----------------------------------------------
# Figure A9
#----------------------------------------------

gaps_plotdat <- cbind(
  data.frame(rbind(
    summary(lm_robust(race_g_white ~ relevel(factor(pid), ref = "a Republican"), data = dat))$coefficients[2, 1:2],
    summary(lm_robust(race_g_black ~ relevel(factor(pid), ref = "a Republican"), data = dat))$coefficients[2, 1:2],
    summary(lm_robust(race_g_hispanic ~ relevel(factor(pid), ref = "a Republican"), data = dat))$coefficients[2, 1:2],
    summary(lm_robust(race_g_asian ~ relevel(factor(pid), ref = "a Republican"), data = dat))$coefficients[2, 1:2],
    summary(lm_robust(race_g_other ~ relevel(factor(pid), ref = "a Republican"), data = dat))$coefficients[2, 1:2],
    summary(lm_robust(rel_g_pro ~ relevel(factor(pid), ref = "a Republican"), data = dat))$coefficients[2, 1:2],
    summary(lm_robust(rel_g_cath ~ relevel(factor(pid), ref = "a Republican"), data = dat))$coefficients[2, 1:2],
    summary(lm_robust(rel_g_jewish ~ relevel(factor(pid), ref = "a Republican"), data = dat))$coefficients[2, 1:2],
    summary(lm_robust(rel_g_ath ~ relevel(factor(pid), ref = "a Republican"), data = dat))$coefficients[2, 1:2],
    summary(lm_robust(rel_g_other ~ relevel(factor(pid), ref = "a Republican"), data = dat))$coefficients[2, 1:2],
    cbind(anes_pid$Dem - anes_pid$Rep, NA) 
  )),
  Comparison = "R vs D",
  Area = factor(rep(c("White", "Black", "Hispanic", "Asian", "Other race",
                      "Protestant", "Catholic", "Jewish", "Atheist or not religious", "Other religion"), 2),
                levels = c("White", "Black", "Hispanic", "Asian", "Other race",
                      "Protestant", "Catholic", "Jewish", "Atheist or not religious", "Other religion")[10:1]), 
  Source = c(rep("Average Estimate", 10), rep("Approximate Truth", 10))
)


pdf("FigureA9.pdf", height = 7.5, width = 5.8)
ggplot(gaps_plotdat, aes(y = Estimate, x = Area, shape = Source)) + 
  geom_point(position = position_dodge(0.3)) +
  geom_errorbar(aes(ymin = Estimate - 1.96*Std..Error, 
                    ymax = Estimate + 1.96*Std..Error), 
                width = 0, position = position_dodge(0.3)) +
  geom_hline(yintercept = 0, color = "darkgrey", linetype = "dashed") +
  labs(x = "", y = "Difference: Democrat - Republican") +
  coord_flip() +
  theme_bw() +
  theme(legend.position = "bottom", legend.box = "vertical", legend.margin = margin())
dev.off()
